from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import os
import sys

join = os.path.join
dirname = os.path.dirname
CURRENT_PATH = dirname(os.path.realpath(__file__))
sys.path.append(CURRENT_PATH)
from utils_exog import gen_exog_diff_trend, month_distance, if_month, gen_X_y_selfexog, pre_labeler

relativedelta_month = 1
DATA_PATH = ''
SAVE_PATH = ''

data_total = pd.read_excel(join(DATA_PATH, '') , sheet_name='')
data_total_val = data_total.iloc[:,4:]

# 生成统计类特征
data_total_val_train =  data_total_val_train.iloc[:,::-1]
data_total_val_train_trend = []
for i in tqdm(range(data_total_val_train.shape[0])):
    train_norm_one = data_total_val_train.iloc[i]
    train_norm_one = train_norm_one.rolling(window=12).mean()
    train_norm_one_T = train_norm_one.values.tolist()
    data_total_val_train_trend.append(train_norm_one_T)


data_total_val_train_trend_df = pd.DataFrame(data_total_val_train_trend)
data_total_val_train_trend_df.columns = data_total_val_train.columns
data_total_val_train_trend_df = data_total_val_train_trend_df.iloc[:,::-1]
data_total_val_train = data_total_val_train.iloc[:,::-1]

data_total_val_train_diff = data_total_val_train - data_total_val_train.shift(-1 , axis = 1)
data_total_val_train_diffm =  data_total_val_train - data_total_val_train_trend_df

range_list = [(0,3), (0,6), (0,9), (3,6), (3,9),
              (3,12), (6,9), (6,12), (9,12)]

features_cycle0 = gen_exog_diff_trend(data_total_val_train, data_total_val_train_diff,
                    data_total_val_train_diffm, data_total_val_train_trend_df, asccode, range_list, cycle = 0)
features_cycle12 = gen_exog_diff_trend(data_total_val_train, data_total_val_train_diff,
                    data_total_val_train_diffm, data_total_val_train_trend_df, asccode, range_list, cycle = 12)
features_cycle24 = gen_exog_diff_trend(data_total_val_train, data_total_val_train_diff,
                    data_total_val_train_diffm, data_total_val_train_trend_df, asccode, range_list, cycle = 24)
features_0_12_24 = pd.concat([features_cycle0, features_cycle12] , axis = 1)

# 对多个特征进行编码
col2label = ['asccode', 'city' , 'province' , 'area']
col2label_savepath = ''

data_total = pre_labeler(data_total, col2label, True, col2label_savepath)

# ########################## gen_X_y
gen_X_y = gen_X_y_selfexog(data_total, features_0_12_24)

gen_X_y.to_excel(join(SAVE_PATH,''))
